/**********************************************************************
DO FILE SUMMARY

This files cleans the HPACC dataset for our CVD prevention medication analysis,
conducts the analysis, and generates figures/tables for main text and appendices.

David Flood, for HPACC collaborators

November 8, 2022
**********************************************************************/

clear
cls
version 16	
set more off
capture log close
set showbaselevels
 
* Set graph options and scheme
set scheme s1color
graph set window fontface "Arial"

* Loading original HPACC dataset
use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/2021_09_23 - Appended dataset.dta"

* Appending additional files cleaned for this analysis
drop if country == "Fiji" // the appended file has the new Fiji steps survey
append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/pooled_new_surveys - 2021.04.28.dta", nolabel

* Use the macro $S_DATE to save files with current date
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Polypill Huffman"
global date : di %tdCCYY.NN.DD date("$S_DATE","DMY")
log using "Log files/log_${date}.log", replace

////////////////////////////////////////////////////////////////////////////////
/////////////////// GENERAL DATASET CLEANING AND PREPARATION ///////////////////
////////////////////////////////////////////////////////////////////////////////

* Keep variables used here
keep country countryGDPclass WHOregionclass Population2015  /// harmonization variables
	p_id stratum_num stratum year psu_num psu svy rural  /// survey design variables
	weq_fbg wpop_fbg w_all w_fbg /// weight variables 
	age sex pregnant educat3 wealth_quintile  /// demographic
	statin aspirin mi csmoke hypt_new hypt_med hypt_med_new bp_ms hypt hbg_new rural ///
	clin_hypt sbp_avg tchol_mgdl ldl_mgdl bmi clin_dia /// variables needed to generate WHO lab risk scores
	sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 ht wt ///
	insulin_new 
	
drop if country == "India" // India hogs memory and is not included, removing early

* Randomly restricts dataset to subset of observations to increase speed while cleaning and building code
* gen random_number = runiform()
* drop if random_number > 0.2
* drop if random_number > 0.2 & inlist(country,"Iran","Mexico")

* Clean missingness ------------------------------------------------------------

	/* See rows 517-520 of codebook v3 for the following:
	
	666666666 = variable not included in the dataset
	777777777 = subject did not know
	888888888 = subject refused to answer
	999999999 = missing due to skip pattern 	
	
	I make the decision to update the codebook as follows:
	
	.n = variable not in dataset
	.m = true missing
	0 = skipped	*/	

	foreach v of varlist statin mi rural statin aspirin hypt_med {
		replace `v' = . if inlist(`v',555555555)  	// 555555555 = not eligible
		replace `v' = . if inlist(`v',666666666, 666666688)  	// 666666666 = variable not in the dataset coded as .
		replace `v' = 0 if inlist(`v',777777777,77,777777792,.d)  			// 777777777 = subject did not know
		replace `v' = .m if inlist(`v',855555554.7,855555571.2,888888888,888888896,888888896,.r)		// 888888888 = subject refused to answer
		replace `v' = 0 if inlist(`v',977777785.6,977777776.8,999999999,1000000000)  	// 999999999 = missing as skip pattern recoded as 0	
	}		
		
* Assessing and dropping countries ---------------------------------------------	
	
* Changing name of Swaziland
	replace country = "Eswatini" if country == "Swaziland"
	
* Dropping countries without statin and mi	
	drop if country == "Albania"
	// drop if country == "Armenia" // response rate <50%
	drop if country == "Belize"
	drop if country == "Brazil"
	drop if country == "Cabo Verde"
	drop if country == "Cambodia"
	drop if country == "Chile"
	drop if country == "China"
	drop if country == "Comoros"
	drop if country == "Costa Rica"
	drop if country == "Egypt"	
	drop if country == "El Salvador"
	drop if country == "Eritrea"
	drop if country == "Fiji"
	drop if country == "Fiji 2011"
	drop if country == "Gambia"
	drop if country == "Ghana"
	drop if country == "Grenada"
	drop if country == "India"
	drop if country == "Indonesia"
	drop if country == "Kazakhstan"
	drop if country == "Laos"
	drop if country == "Lesotho"
	drop if country == "Liberia"
	drop if country == "Libya"
	drop if country == "Malawi"
	drop if country == "Marshall Islands"
	drop if country == "Mexico" 
	drop if country == "Mexico ENSANUT"	// aspirin question is conditional on diabetes so cannot use
	drop if country == "Mongolia" // Using Mongolia STEPS 2019 update
	drop if country == "Mozambique"
	drop if country == "Namibia"
	// drop if country == "Nauru" // Is it an LMIC?
	drop if country == "Niger"
	drop if country == "Peru"
	drop if country == "Romania" // Statin variable available, but mi and aspirin variables not shared with HPACC
	drop if country == "Russian Federation"
	drop if country == "Rwanda"
	drop if country == "Samoa"
	drop if country == "Sao Tome and Principe"
	drop if country == "Seychelles"
	drop if country == "Sierra Leone"
	drop if country == "South Africa"
	drop if country == "South Africa DHS"
	drop if country == "Tanzania"
	drop if country == "Togo"
	drop if country == "Tonga"
	drop if country == "Ukraine"
	drop if country == "Vanuatu"
	drop if country == "Zanzibar"
	
* Adjusting World Bank fiscal groups after careful review of World Bank fiscal year income groups
	* https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups
replace countryGDPclass	= 1 if country == "Kenya"
replace countryGDPclass	= 2 if country == "Guyana"
replace countryGDPclass	= 1 if country == "Kyrgyzstan"
replace countryGDPclass	= 1 if country == "Myanmar"
replace countryGDPclass	= 1 if country == "Nepal"
replace countryGDPclass	= 3 if country == "Nauru"
	
* Replacing country names
replace country = "Mongolia" if country == "Mongolia 2019" 	// Using Mongolia STEPS 2019 update
		
* Generate a country_id variable (i.e., an id within each country)
tab country	
bysort country: gen country_id=(_n) 
label variable country_id "Within country participant number"
sort country
list country svy year if country_id == 1 // looking at country, svy type, and year in a nice list

* Encoded countries	
encode country, gen(country_encoded)	

* Clarify how many countries are in the dataset
distinct country_encoded
scalar n_countries = r(ndistinct)	

* Replacing survey year to year of data collection after meticulous check:
clonevar survey_year = year
replace survey_year = "2016-17" if country == "Algeria"
replace survey_year = "2018-19" if country == "Mexico"
replace survey_year = "2015-16" if country == "Nauru"

* Generating age categories
gen byte age_cat = .
replace age_cat = 1 if inrange(age,30,49.9)
replace age_cat = 2 if inrange(age,50,59.9)
replace age_cat = 3 if inrange(age,60,69.9)

* Labeling some variables that were not labelled in the dataset
label define sex_label 0 "Male" 1 "Female", modify
label values sex sex_label

label variable age_cat "Age category"
label define age_cat_label 1 "40-50 years" 2 "50-59 years" 3 "60-69 years",  modify
label values age_cat age_cat_label
tab age_cat

label variable educat3 "Educational attainment (3 levels)"
	label define educat3_label 0 "No schooling" 1 "Primary education" 2 "Secondary or above", modify
	label values educat3 educat3_label  

label define GDPclass 1 "LIC" 2 "LMIC" 3 "UMIC", modify
tab countryGDPclass

* Decode variables to put them in the table output
decode countryGDPclass, gen(countryGDPclass_string)
tab countryGDPclass_string

decode WHOregionclass, gen(WHOregionclass_string)
tab WHOregionclass_string

* Assessing for duplicate values
duplicates tag country p_id sbp_avg bmi, generate(dup)
list country p_id sbp_avg bmi dup if dup >0
duplicates drop country p_id sbp_avg bmi, force // 4 records in Afghanistan dropped

* Dropping any records without p_id
mdesc p_id
drop if p_id == "" // (1 observation deleted)



/*******************************************************************************
LIPID DATA CLEANING

This step merges in total cholesterol data for the surveys in Uganda, Georgia,
Nepal, and Kenya. This step is necessary as the data was not clean or available 
in the HPACC dataset.

******************************************************************************/

/*	Note: Tchol in mmol/dl is required for lab scores to run. This bit of code
	investigates data availability for tchol_mgdl. */.

* Reviewing data availability
	bysort country: mdesc tchol_mgdl
	
	/* 
	Tchol is not available (i.e., 100% missing) in the following countries and must be merged:
	
	Georgia -> 
	Kenya -> Original tchol not shared from Kenya; as of 4/20/21, WHO Repository request pending
	Nepal -> 
	Uganda -> Updated with STEPS data below; need to ping Michaela on this data update
	*/
	
* Merging Uganda lipid data
	/*	Tchol and HDL for Uganda were not incldued in the dataset originally shared
	but are part of the dataset on the repository. Here I merge in tchol (though not HDL).	*/

	frame create uganda
	frame change uganda
	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Uganda/WHO repository version/uga2014.dta"

	gen country = "Uganda"
	gen tchol_mgdl_uganda = b8*38.67
	
		replace tchol_mgdl_uganda = . if tchol_mgdl>300
		replace tchol_mgdl_uganda = . if tchol_mgdl<3
	
	rename pid p_id
	tostring p_id, replace

	keep country p_id tchol_mgdl_uganda	

	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Uganda/Uganda lipid merge/Uganda cleaned dataset for merge.dta", replace


	frame change default
	frame drop uganda
	merge m:1 country p_id using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Uganda/Uganda lipid merge/Uganda cleaned dataset for merge.dta"
		
	drop _merge // no unmatched from using
	replace tchol_mgdl = tchol_mgdl_uganda if country == "Uganda"
	drop tchol_mgdl_uganda
	
	* browse hdl_mgdl tchol_mgdl if country == "Uganda"

* Merging Georgia lipid data
	/*	Tchol and HDL for Georgia were not included. Here I merge in tchol (though not HDL).	*/

	frame create georgia
	frame change georgia
	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Georgia/georgia dataset.dta", clear

	gen country = "Georgia"
	gen tchol_mgdl_georgia = b8*38.67
	
		replace tchol_mgdl_georgia = . if tchol_mgdl>300
		replace tchol_mgdl_georgia = . if tchol_mgdl<3
	
	rename qr p_id
	tostring p_id, replace

	keep country p_id tchol_mgdl_georgia

	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Georgia/georgia lipid merge", replace


	frame change default
	frame drop georgia
	merge m:1 country p_id using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Georgia/georgia lipid merge.dta"
	
	drop if _merge == 2 // 8 records with almost complete missing data dropped from raw dataset
	drop _merge
	replace tchol_mgdl = tchol_mgdl_georgia if country == "Georgia"
	drop tchol_mgdl_georgia
	
	* browse hdl_mgdl tchol_mgdl if country == "Georgia"
	
* Merging Nepal; lipid data
	/*	Tchol and HDL for Nepal were not included. Here I merge in tchol (though not HDL).	*/

	frame create nepal
	frame change nepal
	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Nepal/Nepal 2019 STEPS/npl2019.dta", clear

	gen country = "Nepal"
	gen tchol_mgdl_nepal = b8
	
		replace tchol_mgdl_nepal = . if tchol_mgdl>300
		replace tchol_mgdl_nepal = . if tchol_mgdl<3
	
	rename pid p_id
	tostring p_id, replace

	keep country p_id tchol_mgdl_nepal

	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Nepal/Nepal 2019 STEPS/nepal lipid merge.dta", replace


	frame change default
	frame drop nepal
	merge m:1 country p_id using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Nepal/Nepal 2019 STEPS/nepal lipid merge.dta"
	
	drop _merge // no unmatched from using
	replace tchol_mgdl = tchol_mgdl_nepal if country == "Nepal"
	drop tchol_mgdl_nepal
	
	* browse hdl_mgdl tchol_mgdl if country == "Nepal"
	
* Merging Kenya lipid data
	/*	Tchol and HDL for Kenya were not incldued in the dataset originally shared
	but are part of the dataset on the repository. Here I merge in tchol (though not HDL).	*/

	frame create kenya
	frame change kenya
	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Kenya/Kenya 2015 STEPS repository data/ken2015.dta"

	gen country = "Kenya"
	gen tchol_mgdl_kenya = b8*38.67
	
		replace tchol_mgdl_kenya = . if tchol_mgdl>300
		replace tchol_mgdl_kenya = . if tchol_mgdl<3
	
	rename (age sex m4a m4b m5a m5b m6a m6b m11 m12 b5) (age sex_string sbp1 dbp1 sbp2 dbp2 sbp3  dbp3 ht wt fbg_new_kenya)
	
	gen sex = 1 if sex_string == "Women"
	replace sex = 0 if sex_string == "Men"
	
	tostring psu, replace
	sort psu
	
	* Assessing for duplicate values 1
	duplicates tag country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3, generate(dup)
	list country country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 tchol_mgdl fbg_new_kenya if dup >0
	drop if dup >0 & tchol_mgdl == .
	duplicates drop country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3, force // 2 records dropped with tchol 
	
	keep country sex psu age sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 tchol_mgdl_kenya

	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Kenya/Kenya lipid merge/Kenya cleaned dataset for merge.dta", replace


	frame change default
	frame drop kenya
	merge m:1 country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Kenya/Kenya lipid merge/Kenya cleaned dataset for merge.dta"
	* browse country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 tchol_mgdl tchol_mgdl_kenya _merge if country == "Kenya"
	
	drop if _merge == 2
	drop _merge // no unmatched from using
	replace tchol_mgdl = tchol_mgdl_kenya if country == "Kenya"
	drop tchol_mgdl_kenya
	
	* browse hdl_mgdl tchol_mgdl if country == "Kenya"	
	
* Final tchol data check
bysort country: mdesc tchol_mgdl // only Kenya missing, see above for details on this

/*******************************************************************************
IMPORTING IN 2019 POPULATION FROM 40-69 YEARS OF AGE BY COUNTRY

This step imports in data on the
*******************************************************************************/

frame create population
frame change population

	/* 		
	
	This file imports a CSV document with the GBD population estimates for 2019 by country
	
	http://ghdx.healthdata.org/record/ihme-data/gbd-2019-population-estimates-1950-2019
	
	There is no data manipulation done to the CSV file downloaded from IHME prior
	to its import here.
	
	*/

import delimited "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/External country data/Population/IHME_GBD_2019_POP_2019_Y2020M10D15.CSV", clear 

rename (location_name) (country)

keep country year age_group_id sex_name val sex_name location_id

drop if sex_name != "both"

drop if location_id == 533
drop if country =="South Asia"
drop if country =="North Africa and Middle East"
drop sex_name sex_name

reshape wide val, i(country) j(age_group_id)

* Values 13-18 represent 5-year age categories of 40-69
keep country val13 val14 val15 val16 val17 val18 val22 // 22 is all ages

* Generates the row total of the relevant age subgroups
egen population_2019_40_69 = rowtotal(val13 val14 val15 val16 val17 val18)

* Generates total population in 2019
rename val22 population_2019_all_ages

keep country population_2019_40_69 population_2019_all_ages

* Renaming some countries
replace country = "Iran" if country == "Iran (Islamic Republic of)"
replace country = "Moldova" if country == "Republic of Moldova"
replace country = "St. Vincent & the Grenadines" if country == "Saint Vincent and the Grenadines"
replace country = "Timor Leste" if country == "Timor-Leste"
replace country = "Vietnam" if country == "Viet Nam"

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/GBD 2019 population.dta", replace

frame change default
frame drop population

* Performing the merge
merge m:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/GBD 2019 population.dta"

* Checking merged data
list country Population2015 population_2019_40_69 population_2019_all_ages if country_id == 1

drop if _merge == 2
drop _merge
	

/**********************************************************************
Calculating 2019 WHO CVD risk

See "help whocvdrisk" for more. Required variables include:

    ccode     - 3 letter country code identifier (ISO 3166-1 alpha-3)
    sex       - Sex (1 = Male, 2 = Female)
    ages      - Age (years)
    smallbin  - Smoking status (0 = Other, 1 = Current)
    hxdiabbin - History of diabetes (0 = No, 1 = Yes)
    sbp       - Systolic blood pressure (mm Hg)
    tchol     - Total cholesterol (mmol/L)
    bmi       - Body mass index (kg/m^2)

Citation: Kaptoge S, Pennells L, De Bacquer D, Cooney MT, Kavousi M, Stevens G,
et al. World Health Organization cardiovascular disease risk charts: revised
models to estimate risk in 21 global regions. The Lancet Global Health. 2019.

Note: whocvdrisk uses "country," "year," and other variables. It will give a 
weird error if those variables are coded in the correct way.

**********************************************************************/

//	Using Kountry codes
*	Note that whocvdrisk rquires country codes in ISO 3166 alpha-3 (iso3c) format

kountry country, from(other) stuck
rename _ISO3N_ iso3n
kountry iso3n, from(iso3n) to(iso3c)
rename _ISO3C_ ccode
drop iso3n

*	Updating country codes
replace ccode = "TZA" if country == "Zanzibar"
replace ccode = "SWZ" if country == "Eswatini"
replace ccode = "TK" if country == "Tokelau"

*	Checking data 
list country ccode if country_id == 1

//	Generating 10-year WHO CVD risk using 2019 equations
clonevar ages = age
recode sex (0=1)(1=2)

// These countries don't run so we default them temporarily to Solomon Island's country code
replace ccode = "SLB" if country == "Tuvalu"
replace ccode = "SLB" if country == "Nauru"
replace ccode = "SLB" if country == "Tokelau"

//	Dummy variables needed to get whocvdrisk to run
gen hxdiabbin = clin_dia
gen tchol = tchol_mgdl/38.67 // requires in mmoll
clonevar smallbin = csmoke
rename year year_renamed
rename country country_renamed
rename sbp_avg sbp

//	Assessing codebook of required WHO variables
* codebook ccode sex ages smallbin hxdiabbin tchol sbp bmi

whocvdrisk	
rename cal2_who_cvdx_m1 who_10yr_lab
rename cal2_who_cvdx_m2 who_10yr_nonlab

//	Recoding sex back to original HPACC code
recode sex (1=0)(2=1)
rename year_renamed year
rename country_renamed country
rename sbp sbp_avg 
replace ccode = "TUV" if country == "Tuvalu"
replace ccode = "NRU" if country == "Nauru"
replace ccode = "TK" if country == "Tokelau"

*****

gen risk_above_20_lab =.
replace risk_above_20_lab = 1 if who_10yr_lab > 0.2 & who_10yr_lab <1
replace risk_above_20_lab = 0 if who_10yr_lab <= 0.2
replace risk_above_20_lab = . if missing(who_10yr_lab)

tab country risk_above_20_lab if inrange(age,40,69), missing
tab country risk_above_20_lab if inrange(age,40,69), missing

////////////////////////////////////////////////////////////////////////////////
/////////////////////////// MAIN OUTCOME DEFINITIONS ///////////////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
GENERATING THE SAMPLE OF PARTICIPANTS
******************************************************************************/		

* Rename hypt_med_new to facilitate macros below
rename hypt_med_new bp_med

* Total sample for use in survey characteristics table
gen byte sample_all = 1 if inrange(age,40,69) & pregnant != 1 & mi != . & !missing(statin) & !missing(aspirin) & !missing(bp_med)
tab country sample_all
tab country sample_all, m

* Primary prevention sample
gen byte samp_pri_lab = .
replace samp_pri_lab = 0 if sample_all == 1 & inlist(risk_above_20_lab,0,1)
replace samp_pri_lab = 1 if sample_all == 1 & (mi == 0 & risk_above_20_lab == 1)

replace samp_pri_lab = . if country == "Iraq"
tab country samp_pri_lab
tab country samp_pri_lab, m

* Secondary prevention sample
gen byte samp_sec = .
replace samp_sec = 0 if sample_all == 1
replace samp_sec = 1 if sample_all == 1 & mi == 1
tab country samp_sec
tab country samp_sec, m

/*******************************************************************************
GENERATING THE OUTCOMES
******************************************************************************/		

* Generate number of meds (exact number)
gen byte n_meds = .
replace n_meds = statin + aspirin + bp_med
replace n_meds = . if missing(statin) | missing(aspirin) | missing(bp_med)
tab n_meds
tab n_meds, missing

* Generate number of meds (at least)
	
	gen byte n_meds_0 = 0
	replace n_meds_0 = 1 if inlist(n_meds,0)
	tab n_meds_0
	
	gen byte n_meds_1m = 0
	replace n_meds_1m = 1 if inlist(n_meds,1,2,3)
	tab n_meds_1m
	
	gen byte n_meds_2m = 0
	replace n_meds_2m = 1 if inlist(n_meds,2,3)
	tab n_meds_2m
	
	gen byte n_meds_3m = 0
	replace n_meds_3m = 1 if inlist(n_meds,3)
	tab n_meds_3m


* Primary CVD prevention
gen byte statin_pri_lab = .
replace statin_pri_lab = 0 if samp_pri_lab == 1 & statin == 0
replace statin_pri_lab = 1 if samp_pri_lab == 1 & statin == 1
tab statin_pri_lab

gen byte aspirin_pri_lab = .
replace aspirin_pri_lab = 0 if samp_pri_lab == 1 & aspirin == 0
replace aspirin_pri_lab = 1 if samp_pri_lab == 1 & aspirin == 1
tab aspirin_pri_lab

gen byte bp_med_pri_lab = .
replace bp_med_pri_lab = 0 if samp_pri_lab == 1 & bp_med == 0
replace bp_med_pri_lab = 1 if samp_pri_lab == 1 & bp_med == 1
tab bp_med_pri_lab

gen byte n_meds_pri_lab = .
replace n_meds_pri_lab = n_meds if samp_pri_lab == 1
tab n_meds_pri_lab, m

gen byte n_meds_0_pri_lab = .
replace n_meds_0_pri_lab = n_meds_0 if samp_pri_lab == 1
tab n_meds_0_pri_lab, m

gen byte n_meds_1m_pri_lab = .
replace n_meds_1m_pri_lab = n_meds_1m if samp_pri_lab == 1
tab n_meds_1m_pri_lab, m

gen byte n_meds_2m_pri_lab = .
replace n_meds_2m_pri_lab = n_meds_2m if samp_pri_lab == 1
tab n_meds_2m_pri_lab, m

gen byte n_meds_3m_pri_lab = .
replace n_meds_3m_pri_lab = n_meds_3m if samp_pri_lab == 1
tab n_meds_3m_pri_lab, m

* Secondary CVD prevention
gen byte statin_sec = .
replace statin_sec = 0 if samp_sec == 1 & statin == 0
replace statin_sec = 1 if samp_sec == 1 & statin == 1
tab statin_sec

gen byte aspirin_sec = .
replace aspirin_sec = 0 if samp_sec == 1 & aspirin == 0
replace aspirin_sec = 1 if samp_sec == 1 & aspirin == 1
tab aspirin_sec

gen byte bp_med_sec = .
replace bp_med_sec = 0 if samp_sec == 1 & bp_med == 0
replace bp_med_sec = 1 if samp_sec == 1 & bp_med == 1
tab bp_med_sec

gen byte n_meds_sec = .
replace n_meds_sec = n_meds if samp_sec == 1
tab n_meds_sec, m

gen byte n_meds_0_sec = .
replace n_meds_0_sec = n_meds_0 if samp_sec == 1
tab n_meds_0_sec, m

gen byte n_meds_1m_sec = .
replace n_meds_1m_sec = n_meds_1m if samp_sec == 1
tab n_meds_1m_sec, m

gen byte n_meds_2m_sec = .
replace n_meds_2m_sec = n_meds_2m if samp_sec == 1
tab n_meds_2m_sec, m

gen byte n_meds_3m_sec = .
replace n_meds_3m_sec = n_meds_3m if samp_sec == 1
tab n_meds_3m_sec, m

////////////////////////////////////////////////////////////////////////////////
///////////////////// MAIN ANALYSIS USING POPULATION WEIGHTS ///////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
This analysis uses population weighting by GBD estimates of the population of
individuals ages 40-69 years of age in each country.

A sensitivity analysis using equal countries weights is performed later.
******************************************************************************/
	
foreach var of varlist ///
		samp_pri_lab samp_sec /// sample
		statin_pri_lab aspirin_pri_lab bp_med_pri_lab n_meds_0_pri_lab n_meds_1m_pri_lab n_meds_2m_pri_lab n_meds_3m_pri_lab  /// primary prevention
		statin_sec aspirin_sec bp_med_sec n_meds_0_sec n_meds_1m_sec n_meds_2m_sec n_meds_3m_sec { // secondary prevention

	* Sample weights
	gen sample = 1 if `var' != .
	
	tab `var' if sample == 1
	
	* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
	* gen wpopnr_sample=w_fbg*population_2019_40_69/sum_wpop_sample

	bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
	gen weqnr_sample=w_fbg/sum_weq_sample
	
	* Set svyset
	svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
	
	* Estimates overall output
	svy, subpop(sample): proportion `var'
	* estimates store wpop_ov_`var'
	* matrix wpop_ov_`var' = r(table)
	
	* Estimates by country
	* svy, subpop(sample): proportion `var', over(country_encoded)
	* estimates store wpop_c_`var'
	* matrix wpop_c_`var' = r(table)
	
	* Estimates by country group
	* svy, subpop(sample): proportion `var', over(WHOregionclass)
	* estimates store wpop_r_`var'
	* matrix wpop_r_`var' = r(table)
	
	* svy, subpop(sample): proportion `var', over(countryGDPclass)
	* estimates store wpop_i_`var'
	* matrix wpop_i_`var' = r(table)
	
	* Drop variables for loop
	drop sample weqnr_sample sum_weq_sample
}
			
/*******************************************************************************
FIGURE PANEL A
*******************************************************************************/		

* EXPORT DATA TO PUTEXCEL ------------------------------------------------------

* Create the Excel doc
putexcel set "Putexcel files/dataset figure_b $date.xlsx", replace 
	
* Make the table frame
putexcel A1 = "med"
putexcel B1 = "est"
putexcel C1 = "lci"
putexcel D1 = "uci"
putexcel E1 = "prevention"

putexcel A2 = 0
putexcel A6 = 0
putexcel A3 = 1
putexcel A7 = 1
putexcel A4 = 2
putexcel A8 = 2
putexcel A5 = 3
putexcel A9 = 3

putexcel E2:E5 = "primary"
putexcel E6:E9 = "secondary"

* 0 primary
	
	matlist wpop_ov_n_meds_0_pri_lab
	matrix results = wpop_ov_n_meds_0_pri_lab

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B2 = `est'
	putexcel C2= `lci'
	putexcel D2 = `uci'
		
* 1 primary
	
	matlist wpop_ov_n_meds_1m_pri_lab
	matrix results = wpop_ov_n_meds_1m_pri_lab

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B3 = `est'
	putexcel C3= `lci'
	putexcel D3 = `uci'
	
* 2 primary
	
	matlist wpop_ov_n_meds_2m_pri_lab
	matrix results = wpop_ov_n_meds_2m_pri_lab

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B4 = `est'
	putexcel C4= `lci'
	putexcel D4 = `uci'
		
* 3 primary
	
	matlist wpop_ov_n_meds_3m_pri_lab
	matrix results = wpop_ov_n_meds_3m_pri_lab

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B5 = `est'
	putexcel C5= `lci'
	putexcel D5 = `uci'
		
* 0 secondary
	
	matlist wpop_ov_n_meds_0_sec
	matrix results = wpop_ov_n_meds_0_sec

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B6 = `est'
	putexcel C6= `lci'
	putexcel D6 = `uci'
		
* 1 secondary
	
	matlist wpop_ov_n_meds_1m_sec
	matrix results = wpop_ov_n_meds_1m_sec

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B7 = `est'
	putexcel C7= `lci'
	putexcel D7 = `uci'
	
* 2 secondary
	
	matlist wpop_ov_n_meds_2m_sec
	matrix results = wpop_ov_n_meds_2m_sec

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B8 = `est'
	putexcel C8= `lci'
	putexcel D8 = `uci'
		
* 3 secondary
	
	matlist wpop_ov_n_meds_3m_sec
	matrix results = wpop_ov_n_meds_3m_sec

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B9 = `est'
	putexcel C9= `lci'
	putexcel D9 = `uci'
	
* CREATE THE FIGURE 1A ---------------------------------------------------------
		
frame create figure
frame change figure

import excel "Putexcel files/dataset figure_b $date.xlsx", sheet("Sheet1") cellrange(A1:E9) firstrow clear

* Generating the x-axis variable for twoway (see UCLA notes above)
generate order = _n
* replace  order = order + 1 if prevention == "primary"
replace  order = order + 1 if prevention == "secondary"

* Colors
local lancet_1 `" "154 166 198" "' // #9999cc
local lancet_2 `" "218 160 153" "' // #daa099
local lancet_3 `" "155 208 217" "' // #9bd0d9
local lancet_4 `" "229 231 229" "' // #e5e7e5

* Local text size macros
local text_size_big "4.5rs"	
local text_size_small "4rs"	
local text_size_mlabel "3.5rs"	

* Twoway graph
twoway ///
	bar est order if med == 0, ///
		xla(, valuelabel) barw(1) lwidth(thin) lcolor(black%100) fcolor(`"`lancet_1'%100"') fintensity(80) lalign(center)	|| ///
	bar est order if med == 1, ///
		xla(, valuelabel) barw(1) lwidth(thin) lcolor(black%100) fcolor(`"`lancet_2'%100"') fintensity(80) lalign(center)	|| ///
	bar est order if med == 2, ///
		xla(, valuelabel) barw(1) lwidth(thin) lcolor(black%100) fcolor(`"`lancet_3'%100"') fintensity(80) lalign(center)	|| ///
	bar est order if med == 3, ///
		xla(, valuelabel) barw(1) lwidth(thin) lcolor(black%100) fcolor(`"`lancet_4'%100"') fintensity(80) lalign(center)	|| ///		
	rcap lci uci order, ///
		msize(2) lcolor(black%100) lwidth(0.35) || ///
	scatter uci order, ///
		mcolor(none) mlabel(est) mlabposition(12) mlabsize(`text_size_mlabel') mlabgap(1.5rs) mlabformat(%4.1f) mlabcolor(black)          ///
	/// graph attributes
		graphregion(color(none) lcolor(none) margin(l=3 r=5 b=5 t=5)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
		xsize(5) ysize(4) ///		
		/// title("Age",  position(11) margin(l=0 r=0 b=3 t=0) size(`text_size') angle(horizontal) )   ///
		/// y axis
			ytitle("Eligible individuals taking drugs (%)", margin(l=0 r=0 b=0 t=0) size(`text_size_big') angle(vertical))  	///
			yscale(r(0 100) lcolor(black) titlegap(0)) ///
			ylabel(0 (20) 100, labcolor(black) angle(horizontal) labsize(`text_size_big') grid) ///
		/// x axis
			xtitle("", margin(l=0 r=0 b=0 t=1) size(`text_size_small') angle(horizontal))  	///
			xscale(r(0.0 10) lcolor(black) titlegap(0)) ///
			xlabel(2.5 "Primary prevention*" 7.5 "Secondary prevention", ///
				labcolor(black) labgap(2.5) angle(horizontal) labsize(`text_size_small') tlength(0) ) ///
			legend( ///
				/// title("{it:Medication}", size(`text_size_big') placement(left))  ///
				on  ring(1) col(4) pos(6) region(color(none) lwidth(none)) ///
				size (`text_size_small') colgap(4) keygap(1.5) ///
				region(fcolor(none) lcolor(none) margin(l=0 r=0 b=0 t=2)) /// 
				symxsize(3rs) symysize(3rs) ///
				alignment(right) ///
				order(1 "0 drug" 2 "1+ drugs" 3 "2+ drugs" 4 "3 drugs")) ///
			name(figure_a, replace) //
	

	graph save "Figures/figure_a ${date}.gph", replace         
	graph export "Figures/figure_a ${date}.pdf", as(pdf) name(figure_a) replace
	graph export "Figures/figure_a ${date}.png", as(png) name(figure_a) replace

frame change default
frame drop figure	

/*******************************************************************************
FIGURE PANEL B
*******************************************************************************/		
		
* EXPORT DATA TO PUTEXCEL ------------------------------------------------------

* Create the Excel doc
putexcel set "Putexcel files/dataset figure_a $date.xlsx", replace 
	
* Make the table frame
putexcel A1 = "med"
putexcel B1 = "est"
putexcel C1 = "lci"
putexcel D1 = "uci"
putexcel E1 = "prevention"

putexcel A2 = "Statin"
putexcel A5 = "Statin"
putexcel A3 = "Antihypertensive"
putexcel A6 = "Antihypertensive"
putexcel A4 = "Aspirin"
putexcel A7 = "Aspirin"

putexcel E2:E4 = "primary"
putexcel E5:E7 = "secondary"

* Statin primary
	
	matlist wpop_ov_statin_pri_lab 
	matrix results = wpop_ov_statin_pri_lab

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B2 = `est'
	putexcel C2= `lci'
	putexcel D2 = `uci'
		
* Antihypertensive primary
	
	matlist wpop_ov_bp_med_pri_lab 
	matrix results = wpop_ov_bp_med_pri_lab

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B3 = `est'
	putexcel C3= `lci'
	putexcel D3 = `uci'
	
* Aspirin primary
	
	matlist wpop_ov_aspirin_pri_lab 
	matrix results = wpop_ov_aspirin_pri_lab

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B4 = `est'
	putexcel C4= `lci'
	putexcel D4 = `uci'	
		
* Statin secondary
	
	matlist wpop_ov_statin_sec 
	matrix results = wpop_ov_statin_sec

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B5 = `est'
	putexcel C5= `lci'
	putexcel D5 = `uci'
		
* Antihypertensive secondary
	
	matlist wpop_ov_bp_med_sec
	matrix results = wpop_ov_bp_med_sec

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B6 = `est'
	putexcel C6= `lci'
	putexcel D6 = `uci'
	
* Aspirin secondary
	
	matlist wpop_ov_aspirin_sec 
	matrix results = wpop_ov_aspirin_sec

	local est = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100
	
	putexcel B7 = `est'
	putexcel C7= `lci'
	putexcel D7 = `uci'	

* CREATE THE FIGURE 1B ---------------------------------------------------------
	
frame create figure
frame change figure

import excel "Putexcel files/dataset figure_a $date.xlsx", sheet("Sheet1") cellrange(A1:E7) firstrow clear

* Generating the x-axis variable for twoway (see UCLA notes above)
generate order = _n
* replace  order = order + 1 if prevention == "primary"
replace  order = order + 1 if prevention == "secondary"

* Colors
local lancet_1 `" "154 166 198" "' // #9999cc
local lancet_2 `" "218 160 153" "' // #daa099
local lancet_3 `" "155 208 217" "' // #9bd0d9

* Local text size macros
local text_size_big "4.5rs"	
local text_size_small "4rs"	
local text_size_mlabel "3.5rs"	

* Twoway graph
twoway ///
	bar est order if med == "Statin", ///
		xla(, valuelabel) barw(1) lwidth(thin) lcolor(black%100) fcolor(`"`lancet_1'%100"') fintensity(80) lalign(center)	|| ///
	bar est order if med == "Antihypertensive", ///
		xla(, valuelabel) barw(1) lwidth(thin) lcolor(black%100) fcolor(`"`lancet_2'%100"') fintensity(80) lalign(center)	|| ///
	bar est order if med == "Aspirin", ///
		xla(, valuelabel) barw(1) lwidth(thin) lcolor(black%100) fcolor(`"`lancet_3'%100"') fintensity(80) lalign(center)	|| ///		
	rcap lci uci order, ///
		msize(2) lcolor(black%100) lwidth(0.35) || ///
	scatter uci order, ///
		mcolor(none) mlabel(est) mlabposition(12) mlabsize(`text_size_mlabel') mlabgap(1.5rs) mlabformat(%4.1f) mlabcolor(black)          ///
	/// graph attributes
		graphregion(color(none) lcolor(none) margin(l=3 r=5 b=5 t=5)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
		xsize(5) ysize(4) ///		
		/// title("Age",  position(11) margin(l=0 r=0 b=3 t=0) size(`text_size') angle(horizontal) )   ///
		/// y axis
			ytitle("Eligible individuals taking drugs (%)", margin(l=0 r=0 b=0 t=0) size(`text_size_big') angle(vertical))  	///
			yscale(r(0 100	) lcolor(black) titlegap(0)) ///
			ylabel(0 (20) 100, labcolor(black) angle(horizontal) labsize(`text_size_big') grid) ///
		/// x axis
			xtitle("", margin(l=0 r=0 b=0 t=1) size(`text_size_small') angle(horizontal))  	///
			xscale(r(0.0 8) lcolor(black) titlegap(0)) ///
			xlabel(2 "Primary prevention*" 6 "Secondary prevention", ///
				labcolor(black) labgap(2.5) angle(horizontal) labsize(`text_size_small') tlength(0) ) ///
			legend( ///
				/// title("{it:Medication}", size(`text_size_big') placement(center))  ///
				on  ring(1) col(3) pos(6) region(color(none) lwidth(none)) ///
				size (`text_size_small') colgap(4) keygap(1.5) ///
				region(fcolor(none) lcolor(none) margin(l=0 r=0 b=0 t=2)) /// 
				symxsize(3rs) symysize(3rs) ///
				alignment(right) ///
				order(1 "Statin" 2 "BP-lowering" 3 "Aspirin")) ///
			name(figure_b, replace) //
	

	graph save "Figures/figure_b ${date}.gph", replace         
	graph export "Figures/figure_b ${date}.pdf", as(pdf) name(figure_b) replace
	graph export "Figures/figure_b ${date}.png", as(png) name(figure_b) replace

frame change default
frame drop figure	
